library(sdm)
library(shiny)
library(sp)
library(gbm)
library(gam)
library(spThin)
library(sf)
library(rJava)
library(tidyverse)
library(MASS)
library(raster)
library(dismo)
library(usdm)
library(ggspatial)
library(patchwork)
library(parallel)

rm(list=ls()) # clear the global environment
dev.off()

# Environmental predictor variables
# load study area polygon
site = read_sf('D:/NgRobusta/Regions/Tz region.shp') %>% filter (Region_Nam=='Kagera')

site1 = read_sf('C:\\Users\\Mr. Kasongi\\Pictures\\output from melkamu\\Tz shapefiles\\Districts/Tz districts.shp') %>% 
  filter (District_N=='Mbinga')

# load predictor variables D:\NgRobusta
setwd('D:/NgRobusta/Robusta soil data1')
setwd('C:/PACSMAC/Objective 2 data/Soil_data/Averaged_soil_data_afric')
file = list.files(pattern='.tif')

soil = raster::stack(file)
soil = soil[[1:3]]
names(soil) = c('BD','CEC','PH')
soil = mask(crop(soil,site),site)
soil[[3]]=soil[[3]]/10

# load topographical variables
setwd('E:/Second PhD paper Data and R codes/Terrain_data/Derived_terrain_variables')
setwd('C:/PACSMAC/Objective 2 data/Terrain_data/Derived_terrain_variables')
setwd("C:/PACSMAC/Objective 2 data/Terrain_data")

file1 = list.files(pattern='.tif')
dem = raster(file1[1])
dem = mask(crop(dem,site),site)

slope = terrain(dem,'slope')
aspect=terrain(dem,'aspect')

topo = raster::stack(aspect,dem,slope)
names(topo) = c('aspect','Elvation','slope')


names(topo)

# historical GCM : time range  1970-2000
setwd('D:/Historical_GCM')
file2 = list.files(pattern='.tif')

climate = raster::stack(file2)

names(climate)

# shortening the names
names(climate) = c('bio1','bio2','bio3','bio4','bio5','bio6','bio7','bio8','bio9','bio10',
                   'bio11','bio12','bio13','bio14','bio15','bio16','bio17','bio18','bio19')
names(climate)

# mask to the extent of study areas
climate = mask(crop(climate,site),site)

names(climate)

# combine all predictor variables
pred = raster::stack(climate,topo,soil)

names(pred)

# check multi-collinearity of CGM data
pred_df = raster::extract(pred,th) %>% data.frame()

var = usdm::vifstep(pred_df)

var1= var@results
setwd("D:/NgRobusta")
write.csv(var1,'Robusta_multicollinearity_result.csv')
# retain variables without multi-collinearity problems 
pred = exclude(pred, var)

names(pred)

#------------spatial thinning
pp = read.csv('G:/Second PhD paper Data and R codes/Species_presence_data/Final_processed_dataset/Geopoints_Arabica_robusta_Cleaned_Processed_Final.csv')

pp = read.csv('C:/Users/Mr. Kasongi/Downloads/Geopoints_Arabica_robusta_Cleaned_Processed_Final.csv')


pp = pp %>% filter(District=='Mbinga') 

pp$X = as.numeric(pp$X)

pp = na.omit(pp)

set.seed(1)

thinned_df = thin(loc.data = pp,
                  lat.col = 'X',
                  long.col = 'Y',
                  spec.col = 'Species',
                  thin.par = 1,
                  reps = 1000,
                  locs.thinned.list.return = TRUE,
                  write.files = FALSE,
                  write.log.file = FALSE)

Traindata = thinned_df[[1]]

# create spatial dataframe

coordinates(Traindata) = ~ Longitude + Latitude

#---------------------------------create sdm data object
d = sdmData(PA~.,train=Traindata,predictors = pred,bg=list(n=1000))

#---------------------------------model fitting
m = sdm(PA~., d, methods =c('brt','rf','svm'),test.percent = 30, replication='sub',n=5)

gui(m) # visualize model performance using shiny app

# response curve
rcurve(m)
#r1 = rcurve(m,id=1:7)
#r2 = rcurve(m,id=8:15,main='')
r3 = r1/r2
r1 = rcurve(m,c('bio2','bio3','bio4','bio6','bio12','bio14','bio17'))
r2 = rcurve(m,c('bio18','bio19','aspect','slope','CEC','PH'), main='')
r3 = r1/r2

setwd("D:/NgRobusta")
ggsave('Robusta_response_curve.png',r3,width=10,height=8,units='in')

# ROC-AUC
roc(m,smooth=T)

# variable importance
varI = getVarImp(m)

# get data frame
VariabIm = data.frame(variable=varI@varImportanceMean[["AUCtest"]][["variables"]],
                      AUCmean= varI@varImportanceMean[["AUCtest"]][["AUCtest"]],
                      AUClower=varI@varImportanceMean[["AUCtest"]][["lower"]],
                      AUCupper=varI@varImportanceMean[["AUCtest"]][["upper"]])

# plot variable importance
p1 = ggplot(VariabIm,aes(x=reorder(variable,AUCmean),y=AUCmean))+geom_col(fill='purple1')+
  geom_errorbar(aes(ymin=AUClower,ymax=AUCupper))+
  coord_flip()+labs(x='Variables',y='Relative variable importance')+
  theme_minimal()+scale_y_continuous(expand = c(0,0.01),labels =scales::percent)+
  theme(axis.ticks.x = element_blank())

p1
setwd("D:/NgRobusta")
ggsave('D:/NgRobusta/variable_importance.png',p1,height=4,width=7,units='in')

#save the Dataframe of VI
write.csv(VariabIm,'D:/NgRobusta/arabica_VI.csv')

# multicollinearity result
#multicol = varI@result

# spatial prediction
p1 = predict(m,pred)

p1 = mask(crop(p1,site1),site1)

# Model Ensemble predictions
en1 = ensemble(m,p1,setting=list(method='weighted',stat='TSS'))

#--------------------evaluate model ensemble performance
ev1 = getEvaluation(m,stat=c('AUC','TSS','threshold'),opt = 2)

ensemble_thd = mean(ev1$threshold)

#--------------------create binary raster for ensemble
par(mfrow=c(1,2))

plot(en1,main='(a)')
plot(site1[0],add=T)

plot(en1>= ensemble_thd,main='(b)')
plot(site1[0],add=T)

current = en1>=ensemble_thd 


# save the current projection
setwd("D:/NgRobusta")
writeRaster(current,'D:/NgRobusta/current_Robusta_projection.tif')

# 
current1 = as.data.frame(as.data.frame(current,xy=T,period='current',GCM='current')%>%drop_na())

current1 = current1 %>% mutate(binary=ifelse(ensemble_weighted== TRUE,'Suitable','Unsuitable'))

#---------------------- response curves
#sdm::rcurve(m)

#-------------------------------------------------------------------------------
# process future climate scenarios and other biophysical data
# retain all needed GCMs

#'wc2.1_2.5m_bioc_BCC-CSM2-MR_',
#'wc2.1_30s_bioc_EC-Earth3-Veg_',
#'s_bioc_MPI-ESM1-2-HR_'
#'wc2.1_30s_bioc_HadGEM3-GC31-LL
setwd("D:/Future GCMs/Retained_GCM")
x = c('wc2.1_30s_bioc_HadGEM3-GC31-LL_',
      'wc2.1_30s_bioc_UKESM1-0-LL_',
      'wc2.1_30s_bioc_ACCESS-CM2_',
      'wc2.1_30s_bioc_MIROC6_',
      'wc2.1_30s_bioc_CMCC-ESM2_',
      'wc2.1_30s_bioc_INM-CM5-0_',
      'wc2.1_30s_bioc_IPSL-CM6A-LR_')

y = c('ssp245_2041-2060.tif','ssp245_2061-2080.tif')


for(i in 1:length(x)){
  
  for(j in 1:length(y)){
    
    setwd('D:/Future GCMs/Retained_GCM')
    
    
    if(!file.exists(paste0('D:\\NgRobusta\\ssp245\\',x[i],y[j]))){
      
      if(!file.exists(paste0('D:\\NgRobusta\\suitability index\\',x[i],y[j]))){
        
      # ssp 245
      files = list.files(pattern=paste0(x[i],y[j]))
      
      climatedata = raster::stack(files)
      
      names(climatedata)
      
      # shortening the names
      names(climatedata) = c('bio1','bio2','bio3','bio4','bio5','bio6','bio7','bio8',
                             'bio9','bio10','bio11','bio12','bio13','bio14','bio15',
                             'bio16','bio17','bio18','bio19')
      
      climatedata = mask(crop(climatedata,site),site)
      
      # combine all predictor variables
      predictors = raster::stack(climatedata,topo,soil)
      
      names(predictors)
      
      # retain variables 
      predictors = exclude(predictors, var)
      
      names(predictors)
      
      # projecting to future climate scenarios under SSP2-4.5
      
      project1 = predict(m,predictors)
      
      project1 = mask(crop(project1 ,site1),site1)
      
      ensemble1 = ensemble(m,project1,setting=list(method='weighted',stat='TSS',opt=2))
      
      # Save suitability index maps
      writeRaster(ensemble1,paste0('D:\\NgRobusta\\suitability index\\Robusta_',x[i],y[j]))
      
      ensemble2 = ensemble1 >= ensemble_thd 
      
      # Save suitability binary maps
      writeRaster(ensemble2,paste0('D:\\NgRobusta\\ssp245\\Robusta_',x[i],y[j]))}}
  }
}

# load stored binary projection maps and convert to dataframe
dat1 = data.frame()

x = c('Robusta_wc2.1_30s_bioc_HadGEM3-GC31-LL_',
        'Robusta_wc2.1_30s_bioc_UKESM1-0-LL_',
        'Robusta_wc2.1_30s_bioc_ACCESS-CM2_',
        'Robusta_wc2.1_30s_bioc_MIROC6_',
        'Robusta_wc2.1_30s_bioc_CMCC-ESM2_',
        'Robusta_wc2.1_30s_bioc_INM-CM5-0_',
        'Robusta_wc2.1_30s_bioc_IPSL-CM6A-LR_')

for(i in 1:length(x)){
  for(j in 1:length(y)){
    
  setwd('C:\\Users\\Mr. Kasongi\\Pictures\\output from melkamu\\ssp245')
  
  files = list.files(pattern=paste0(x[i],y[j]))
  
  dat = rast(files)
  
  d = as.data.frame(as.data.frame(dat,xy=T,period=y[j],GCM=x[i])%>%drop_na())
  
  d1 = d %>% mutate(binary=ifelse(ensemble_weighted>0,'Suitable','Unsuitable'))
  
  dat1 = rbind(dat1,d1)}
}

# retain all needed GCMs
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_HadGEM3-GC31-LL_'] = 'HadGEM3'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_MIROC6_'] = 'MIROC6'
#dat1$GCM[dat1$GCM=='s_bioc_MPI-ESM1-2-HR_'] = 'MPI'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_IPSL-CM6A-LR_'] = 'IPSL'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_ACCESS-CM2_'] = 'ACCESS'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_UKESM1-0-LL_'] = 'UKESM1'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_CMCC-ESM2_'] = 'CMCC'
#dat1$GCM[dat1$GCM=='wc2.1_2.5m_bioc_BCC-CSM2-MR_'] = 'BCC'
#dat1$GCM[dat1$GCM=='wc2.1_30s_bioc_EC-Earth3-Veg_'] = 'EC-Earth'
dat1$GCM[dat1$GCM=='Robusta_wc2.1_30s_bioc_INM-CM5-0_'] = 'INM'
#dat1$GCM[dat1$GCM=='wc2.1_30s_bioc_MRI-ESM2-0_'] = 'MRI'

dat1$period[dat1$period=='ssp245_2041-2060.tif']='2041-2060'
dat1$period[dat1$period=='ssp245_2061-2080.tif']='2061-2080'

# make GCM ensemble predictions from suitability index maps
z = c('2041-2060','2061-2080')

mean = data.frame()

for(h in 1:length(z)){
  
  setwd('C:\\Users\\Mr. Kasongi\\Pictures\\output from melkamu\\suitability index ssp245')
  
  files1 = list.files(pattern=z[h])
  
  ens = raster::stack(files1)
  
  ens_mean = mean(ens)
  
  ens_mean1 = ens_mean>= ensemble_thd
  
  writeRaster( ens_mean1,paste0('C:\\Users\\Mr. Kasongi\\Pictures\\output from melkamu\\Robusta_ensemble_projection_ssp245_',z[h],'.tif'),overwrite=T)
  
  ens_mean2 = as.data.frame(as.data.frame(ens_mean1,xy=T)%>%drop_na())
  
  names(ens_mean2)[3] = 'ensemble_weighted'
  
  ens_mean2= ens_mean2 %>% mutate(binary=ifelse(ensemble_weighted == TRUE,'Suitable','Unsuitable'))
  
  ens_mean2$period = z[h]
  
  ens_mean2$GCM = 'Mean'
  
  mean = rbind(ens_mean2,mean)
  
}

# merge current, individual GCM projections and ensemble GCM projections
merged = rbind(current1,dat1,mean)

merged$period = factor(merged$period,levels = c('current','2041-2060','2061-2080'))

write.csv(merged,'merged_robusta_ssp245.csv')

########################################################################

# Area change analysis
Area = data.frame(
  
      # retain all needed GCMs
      GCM = c('current',
              'HadGEM3','HadGEM3',
              'ACCESS','ACCESS',
              'MIROC6','MIROC6',
              'IPSL','IPSL',
              'UKESM1','UKESM1',
              'CMCC','CMCC',
              'INM','INM',
              'Mean','Mean'),
      
      period=c('Current',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080',
               '2041-2060','2061-2080'),
        
      # follow same order as in GCM colum created above
      suitable_area=c(nrow(filter(merged,GCM=='current'& binary=='Suitable')),
                                      nrow(filter(merged,GCM=='HadGEM3'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='HadGEM3'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='ACCESS'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='ACCESS'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='MIROC6'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='MIROC6'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='IPSL'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='IPSL'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='UKESM1'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='UKESM1'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='CMCC'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='CMCC'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='INM'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='INM'& period=='2061-2080' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='Mean'& period=='2041-2060' & binary =='Suitable')),
                                      nrow(filter(merged,GCM=='Mean'& period=='2061-2080' & binary =='Suitable'))
                                      ))

# percantage change in suitability area relative to the current suitability area size
Area= Area %>% mutate(prop=(suitable_area-Area[1,3])/Area[1,3])

Area$period = factor(Area$period,levels = c('Current','2041-2060','2061-2080'))

Area$GCM = factor(Area$GCM,levels=c('current','HadGEM3','ACCESS','MIROC6','IPSL','UKESM1','CMCC','INM','Mean'))

current_1 = filter(Area,GCM=='current')

# plotting the results
p2 = ggplot(data=filter(Area,GCM!='current'),aes(GCM,suitable_area,fill=period))+
  geom_col(data=current_1,aes(GCM,suitable_area),fill='green3',width=0.4)+
  geom_col(width=0.4,position = 'dodge')+
  geom_text(aes(label=suitable_area),position=position_dodge(width=0.5),vjust=-0.5,size=2)+
  geom_text(data=current_1,aes(label=suitable_area),vjust=-0.5,size=2)+
  labs(y=expression(Total~potential~suitable~area~'('~km^2~')'),x='',title='(a)',fill='')+
  theme_bw()+
  theme(legend.position = 'none')+
  scale_fill_manual(values = c('indianred1','cyan3',''))+
  scale_y_continuous(limits = c(0,2650))
  
p3 = ggplot(filter(Area,GCM!='current'),aes(GCM,prop,fill=period))+
  geom_col(width=0.7,position = 'dodge')+
  geom_text(aes(label=scales::percent(prop,accuracy = 0.1)),position=position_dodge(width=0.5),vjust=1.2,size=2)+
  scale_y_continuous(labels = scales::percent)+
  labs(y=expression(Change~'in'~area~suitability),x='',title='(b)',fill='')+
  theme_bw()+
  theme(legend.position = 'bottom')

p4 = p2/p3

p4

ggsave('change in area Robusta projection ssp245.png',p4,height=6,width=9,units='in',dpi=320)
